In [48]:
import numpy as np
import pandas as pd
import scanpy as sc
import scvelo as scv
sc.settings.verbosity = 3 
sc.logging.print_header()
sc.settings.set_figure_params(dpi=80, facecolor='white')
scv.logging.print_version()
scv.settings.verbosity = 3  # show errors(0), warnings(1), info(2), hints(3)
scv.settings.presenter_view = True  # set max width size for presenter view
scv.settings.set_figure_params('scvelo')  # for beautified visualization
results_file = 'results_HL.h5ad'
adata_r = sc.read_10x_mtx('/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/filtered_feature_bc_matrix/', var_names='gene_symbols',cache=False)
adata_r.var_names_make_unique()
adata_r

velocity = scv.read("/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/Parent_NGSC3_DI_HodgkinsLymphoma_possorted_genome_bam_JLA4X.loom")

adata = scv.utils.merge(adata_r, velocity)
scanpy==1.6.0 anndata==0.7.4 umap==0.4.6 numpy==1.19.2 scipy==1.5.3 pandas==1.1.3 scikit-learn==0.23.2 statsmodels==0.12.0 python-igraph==0.8.3 leidenalg==0.8.2
--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.
--> This might be very slow. Consider passing `cache=True`, which enables much faster reading from a cache file.
Variable names are not unique. To make them unique, call `.var_names_make_unique`.
In [49]:
adata_r = scv.utils.merge(adata_r, velocity)
In [50]:
adata_r
Out[50]:
AnnData object with n_obs × n_vars = 3394 × 36601
    obs: 'initial_size_spliced', 'initial_size_unspliced', 'initial_size'
    var: 'gene_ids', 'feature_types', 'Accession', 'Chromosome', 'End', 'Start', 'Strand'
    layers: 'matrix', 'ambiguous', 'spliced', 'unspliced'
In [51]:
sc.pp.filter_cells(adata_r, min_genes=200)
sc.pp.filter_genes(adata_r, min_cells=3)
sc.pp.filter_cells(adata_r, max_counts=39766)
sc.pp.filter_cells(adata_r, max_genes=5942)
adata_r.var['mt'] = adata_r.var_names.str.startswith('MT-')  # annotate the group of mitochondrial genes as 'mt'
sc.pp.calculate_qc_metrics(adata_r, qc_vars=['mt'], percent_top=None, log1p=False, inplace=True)
adata = adata_r[adata_r.obs.pct_counts_mt < 10, :]
filtered out 413 cells that have less than 200 genes expressed
filtered out 16445 genes that are detected in less than 3 cells
filtered out 56 cells that have more than 39766 counts
filtered out 10 cells that have more than 5942 genes expressed
In [52]:
sc.pp.normalize_total(adata, target_sum=1e4)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)
adata.raw = adata
sc.pp.regress_out(adata, 'pct_counts_mt')
sc.pp.scale(adata, max_value=10)
cell_cycle_genes = [x.strip() for x in open('cell_cycle.txt')]

# Split into 2 lists
s_genes = cell_cycle_genes[:43]
g2m_genes = cell_cycle_genes[43:]

cell_cycle_genes = [x for x in cell_cycle_genes if x in adata.var_names]
sc.tl.score_genes_cell_cycle(adata, s_genes=s_genes, g2m_genes=g2m_genes)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=40)
sc.tl.umap(adata)
normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
regressing out pct_counts_mt
    sparse input is densified and may lead to high memory use
... storing 'feature_types' as categorical
... storing 'Chromosome' as categorical
... storing 'Strand' as categorical
    finished (0:01:11)
calculating cell cycle phase
computing score 'S_score'
WARNING: genes are not in var_names and ignored: ['MLF1IP']
    finished: added
    'S_score', score of gene set (adata.obs).
    645 total control genes are used. (0:00:00)
computing score 'G2M_score'
WARNING: genes are not in var_names and ignored: ['FAM64A', 'HN1']
    finished: added
    'G2M_score', score of gene set (adata.obs).
    769 total control genes are used. (0:00:00)
-->     'phase', cell cycle phase (adata.obs)
computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:00)
computing neighbors
    using 'X_pca' with n_pcs = 40
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:03)
In [53]:
adata.write("filtered.h5ad")
#adata = sc.read_h5ad("/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/filtered.h5ad")
... storing 'phase' as categorical
... storing 'feature_types' as categorical
... storing 'Chromosome' as categorical
... storing 'Strand' as categorical
In [54]:
sc.tl.leiden(adata, resolution=0.5)
sc.pl.umap(adata, color=['leiden'])
running Leiden clustering
    finished: found 11 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
In [56]:
sc.tl.score_genes(adata, ["CD4"], score_name="CD4X")
cd4s = adata[adata.obs.CD4X > 0, :]
cd4s
sc.pl.umap(cd4s, color="leiden")
sc.pl.umap(adata, color="leiden")
computing score 'CD4X'
    finished: added
    'CD4X', score of gene set (adata.obs).
    50 total control genes are used. (0:00:00)
In [57]:
sc.tl.score_genes(adata, ["CD8A"], score_name="CD8X")
tcells = adata[adata.obs.CD8X > 0, :]
tcells
sc.pl.umap(tcells, color="leiden")
sc.pl.umap(adata, color="leiden")
computing score 'CD8X'
    finished: added
    'CD8X', score of gene set (adata.obs).
    50 total control genes are used. (0:00:00)
In [58]:
sc.pl.umap(cd4s, color="CD8A", color_map="magma")
In [59]:
sc.pl.umap(tcells, color="CD4", color_map="magma")
In [61]:
sc.pl.umap(tcells, color="CD4", color_map="magma")
In [62]:
sc.tl.score_genes(adata, ["CD3E"], score_name="tvel")
tvel = adata[adata.obs.tvel > 0, :]
tvel
sc.pl.umap(tvel, color="leiden")
sc.pl.umap(adata, color="leiden")
computing score 'tvel'
    finished: added
    'tvel', score of gene set (adata.obs).
    50 total control genes are used. (0:00:00)
In [63]:
tvel
Out[63]:
View of AnnData object with n_obs × n_vars = 1053 × 20156
    obs: 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'CD4X', 'CD8X', 'tvel'
    var: 'gene_ids', 'feature_types', 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'matrix', 'ambiguous', 'spliced', 'unspliced'
    obsp: 'distances', 'connectivities'
In [65]:
scv.pl.proportions(tvel, groupby="leiden")
In [66]:
scv.pp.moments(tvel, n_pcs=30, n_neighbors=30)
scv.tl.recover_dynamics(tvel)
scv.tl.velocity(tvel, mode='dynamical')
scv.tl.velocity_graph(tvel)

tvel.write('/Users/bahawarsdhillon/Desktop/BIO 257 - Applied Genomics/Scanpy-Project/tvel.h5ad')
WARNING: Did not normalize X as it looks processed already. To enforce normalization, set `enforce=True`.
Normalized count data: spliced, unspliced.
computing neighbors
    finished (0:00:00) --> added 
    'distances' and 'connectivities', weighted adjacency matrices (adata.obsp)
computing moments based on connectivities
    finished (0:00:01) --> added 
    'Ms' and 'Mu', moments of un/spliced abundances (adata.layers)
recovering dynamics
    finished (0:09:11) --> added 
    'fit_pars', fitted parameters for splicing dynamics (adata.var)
computing velocities
    finished (0:00:03) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph
    finished (0:00:06) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)
In [67]:
scv.pl.velocity_embedding_stream(tvel, basis='umap', color="leiden")
computing velocity embedding
    finished (0:00:00) --> added
    'velocity_umap', embedded velocity vectors (adata.obsm)
In [68]:
scv.pl.velocity_embedding(tvel, arrow_length=3, arrow_size=2, dpi=120, color="leiden")
In [69]:
scv.tl.velocity_confidence(tvel)
keys = 'velocity_length', 'velocity_confidence'
scv.pl.scatter(tvel, c=keys, cmap='coolwarm', perc=[5, 95])
--> added 'velocity_length' (adata.obs)
--> added 'velocity_confidence' (adata.obs)
--> added 'velocity_confidence_transition' (adata.obs)
In [70]:
scv.tl.latent_time(tvel)
top_genes = tvel.var['fit_likelihood'].sort_values(ascending=False).index
scv.pl.scatter(tvel, basis=top_genes[:15], ncols=5, frameon=False, color="leiden")
computing terminal states
    identified 0 region of root cells and 1 region of end points .
    finished (0:00:00) --> added
    'root_cells', root cells of Markov diffusion process (adata.obs)
    'end_points', end points of Markov diffusion process (adata.obs)
WARNING: No root cells detected. Consider specifying root cells to improve latent time prediction.
computing latent time using root_cells as prior
    finished (0:00:02) --> added 
    'latent_time', shared time (adata.obs)
In [72]:
scv.pl.velocity(tvel, var_names= ["IL2RA"], color="leiden")
In [73]:
scv.logging.print_version()
scv.settings.verbosity = 3  # show errors(0), warnings(1), info(2), hints(3)
scv.settings.presenter_view = True  # set max width size for presenter view
scv.settings.set_figure_params('scvelo')  # for beautified visualization
Running scvelo 0.2.2 (python 3.8.3) on 2020-12-06 22:21.
In [74]:
scv.pl.velocity(tvel, var_names= ["IL2RA"], color="leiden")
In [75]:
scv.pl.velocity(tvel, var_names= ["IFNG", "TBX21"], color="leiden")
In [77]:
#naive cells
scv.pl.velocity(tvel, var_names= ["CCR7", "IL7R", "LEF1"], color="leiden")
In [79]:
#Treg markers
scv.pl.velocity(tvel, var_names= ["IL2RA", "FOXP3", "CTLA4", "LAG3", "TNFRSF18", "IKZF2", "IKZF4", "LGMN"], color="leiden")
In [84]:
sc.pl.umap(tvel, color="CD8B", color_map="magma")
In [85]:
#CD8 cytotoxic markers
scv.pl.velocity(tvel, var_names= ["GZMK", "GNLY", "GZMA"], color="leiden")
In [86]:
sc.tl.score_genes(cd4s, ["CD8A"], score_name="CD8X2")
dp = cd4s[cd4s.obs.CD8X2 > 0, :]
dp
sc.pl.umap(dp, color="leiden")
sc.pl.umap(cd4s, color="leiden")
computing score 'CD8X2'
Trying to set attribute `.obs` of view, copying.
    finished: added
    'CD8X2', score of gene set (adata.obs).
    50 total control genes are used. (0:00:02)
In [87]:
dp
Out[87]:
View of AnnData object with n_obs × n_vars = 53 × 20156
    obs: 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'CD4X', 'CD8X2'
    var: 'gene_ids', 'feature_types', 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'matrix', 'ambiguous', 'spliced', 'unspliced'
    obsp: 'distances', 'connectivities'
In [95]:
ndp
Out[95]:
AnnData object with n_obs × n_vars = 717 × 20156
    obs: 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'CD4X', 'CD8X2'
    var: 'gene_ids', 'feature_types', 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'matrix', 'ambiguous', 'spliced', 'unspliced'
    obsp: 'distances', 'connectivities'
In [90]:
merge = dp.concatenate(ndp)
merge
Out[90]:
AnnData object with n_obs × n_vars = 770 × 20156
    obs: 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'CD4X', 'CD8X2', 'batch'
    var: 'gene_ids', 'feature_types', 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'mean', 'std', 'dispersions-0', 'dispersions_norm-0', 'dispersions-1', 'dispersions_norm-1'
    obsm: 'X_pca', 'X_umap'
    layers: 'matrix', 'ambiguous', 'spliced', 'unspliced'
In [103]:
#batch 0 is CD4 and CD8 double positive cells
sc.pl.violin(merge, keys=["IL21", "IL4", "IL2RA"], groupby="batch")
In [110]:
sc.pl.matrixplot(merge, var_names=["IL2RA", "CD74"], groupby="batch")

sc.pl.matrixplot(merge, var_names=["TNFRSF8", "CD274", "IRF4", "PAX5", "FUT4"], groupby="batch")
In [102]:
sc.tl.rank_genes_groups(merge, "batch", method="wilcoxon")

#variably expressed genes for the double positive batch are mostly associated with various cancers???
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:01)
Out[102]:
0 1
0 CD8A RPL13
1 MALT1 RPS6
2 IL2RA RPS23
3 SEC14L1 RPL39
4 PMAIP1 RPL36
5 RTKN2 RPS14
6 ATP2C1 RPL36A
7 CLDND1 RPS18
8 CD74 RPLP2
9 RGS2 RPLP1
In [114]:
pd.DataFrame(merge.uns['rank_genes_groups']['names']).head(20)
Out[114]:
0 1
0 CD8A RPL13
1 MALT1 RPS6
2 IL2RA RPS23
3 SEC14L1 RPL39
4 PMAIP1 RPL36
5 RTKN2 RPS14
6 ATP2C1 RPL36A
7 CLDND1 RPS18
8 CD74 RPLP2
9 RGS2 RPLP1
10 HLA-A RPS21
11 KLF3 RPL30
12 MIR4435-2HG RPL34
13 ATP5MC3 RPS7
14 DUSP5 RPL18A
15 HLA-DRB1 RPS28
16 MYL6 RPS3A
17 TENT5C RPS10
18 HLA-DQB1 MGAT4A
19 CCM2 EEF1D
In [113]:
 
In [115]:
sc.pl.umap(adata, color="leiden")
In [116]:
bcells = adata[adata.obs["leiden"].isin(["2"])]

sc.tl.score_genes(bcells, ["TNFRSF8", "CD274", "IRF4", "PAX5", "FUT4"], score_name="HRS")

sc.pl.umap(bcells, color="HRS", color_map="magma")
computing score 'HRS'
Trying to set attribute `.obs` of view, copying.
    finished: added
    'HRS', score of gene set (adata.obs).
    200 total control genes are used. (0:00:01)
In [117]:
adata
Out[117]:
AnnData object with n_obs × n_vars = 2359 × 20156
    obs: 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_genes', 'n_counts', 'n_genes_by_counts', 'total_counts', 'total_counts_mt', 'pct_counts_mt', 'S_score', 'G2M_score', 'phase', 'leiden', 'CD4X', 'CD8X', 'tvel'
    var: 'gene_ids', 'feature_types', 'Accession', 'Chromosome', 'End', 'Start', 'Strand', 'n_cells', 'mt', 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'mean', 'std'
    uns: 'log1p', 'hvg', 'pca', 'neighbors', 'umap', 'leiden', 'leiden_colors'
    obsm: 'X_pca', 'X_umap'
    varm: 'PCs'
    layers: 'matrix', 'ambiguous', 'spliced', 'unspliced'
    obsp: 'distances', 'connectivities'
In [123]:
allt = tvel

sc.tl.pca(allt, svd_solver='arpack')
sc.pp.neighbors(allt, n_neighbors=10, n_pcs=40)
sc.tl.umap(allt)
sc.tl.leiden(allt, resolution=0.5)
sc.pl.umap(allt, color=['leiden'])
computing PCA
    on highly variable genes
    with n_comps=50
    finished (0:00:01)
computing neighbors
    using 'X_pca' with n_pcs = 40
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:00)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:02)
running Leiden clustering
    finished: found 5 clusters and added
    'leiden', the cluster labels (adata.obs, categorical) (0:00:00)
In [124]:
cd8_markers= ['GZMA', 'CCL5', 'GZMB', 'KLRD1', 'NKG7', 'CD8A', 'KLRK1', 'LGALS1', 'KLRC1', 'KLRG1', 'LGALS3', 'GZMK', 'CX3CR1', 'PLAC8', 'ZEB2', 'CTSD', 'H2AFZ', 'EPSTI1', 'DNAJC15', 'CD48', 'CTSW', 'SELPLG', 'TMSB4X', 'PLEK', 'TM6SF1', 'S100A10', 'VIM', 'ZYX', 'ABRACL', 'CRIP1', 'RACGAP1', 'CYBA', 'CCR2', 'PYCARD', 'SUB1', 'RAP1B', 'GLIPR2', 'EMP3', 'KRTCAP2', 'ID2', 'RPA2', 'STMN1', 'AHNAK', 'DAPK2', 'OSTF1', 'ITGAX', 'ANXA6', 'CD47', 'ARL4C', 'RUNX3', 'IFNGR1', 'LFNG', 'RASGRP2', 'REEP5', 'CHSY1', 'IL18RAP', 'JAK1', 'SGK1', 'KLRC2', 'TBX21', 'SEMA4A', 'SLAMF7', 'TXNDC5', 'ST3GAL6', 'CCNB2', 'CTSC', 'BIRC5', 'ITGB2', 'SP100', 'LAMB3', 'UBE2C', 'ANXA2', 'CENPW', 'CALM1', 'COX5B', 'COX5A', 'SNX10', 'EFHD2', 'LSP1', 'SERPINB9', 'HMGB2', 'THY1', 'RBM3', 'SNX5', 'PPP3CA', 'ARL6IP5', 'CKS1B', 'SEC61B', 'GNA15', 'NPM3', 'UBE2G2', 'ACTB', 'UGCG', 'S1PR4', 'CLIC1', 'LMNB1', 'CKS2', 'TPM4', 'TAGLN2', 'NDUFB7', 'PTP4A3', 'H2AFY', 'HMGN2', 'YWHAQ', 'GGH', 'DNAJC9', 'CMPK1', 'GIMAP7', 'CCND3', 'HCST', 'SMC2', 'ANAPC5', 'DEK', 'LSM5', 'CMTM7', 'PCNA', 'H2AFV', 'RAN', 'ANP32E', 'CENPA', 'SLBP', 'DUT', 'TMPO', 'H2AFX', 'TUBB4B', 'UBE2S', 'TUBA1B']

sc.tl.score_genes(allt, cd8_markers, score_name="cd8_ciucci")
sc.pl.violin(allt, keys="cd8_ciucci", groupby="leiden")
computing score 'cd8_ciucci'
    finished: added
    'cd8_ciucci', score of gene set (adata.obs).
    995 total control genes are used. (0:00:00)
In [126]:
treg_markers = ['FOXP3', 'IKZF2', 'TNFRSF4', 'CAPG', 'TNFRSF18', 'CD74', 'CTLA4', 'RGS1', 'CCND2', 'SELL', 'SERINC3', 'SAMSN1', 'IFNGR1', 'GIMAP7', 'LTB', 'BTG1', 'IL7R', 'SDF4', 'CD2', 'SHISA5', 'GPX4', 'MBNL1', 'PELI1']

sc.tl.score_genes(allt, treg_markers, score_name="treg_ciucci")
sc.pl.violin(allt, keys="treg_ciucci", groupby="leiden")
computing score 'treg_ciucci'
    finished: added
    'treg_ciucci', score of gene set (adata.obs).
    297 total control genes are used. (0:00:00)
In [127]:
tfh_markers = ['PDCD1', 'RGS10', 'CXCR5', 'TOX', 'TOX2', 'PTRH1', 'ANGPTL2', 'MARCKSL1', 'SMCO4', 'TNFSF8', 'PPP1R14B', 'TNFAIP8', 'HIF1A', 'LPP', 'MAF', 'CD160', 'SH2D1A', 'CD200', 'TBC1D4', 'TPI1', 'RPSA', 'HMGB1', 'ICOS', 'GAPDH', 'PRKCA', 'PTPRCAP', 'ZAP70', 'PTPN11', 'DENND2D', 'ZFP36L1', 'FYN', 'GDI2', 'LIMD2', 'PKM', 'NT5E', 'CTSB', 'PFKL', 'PGAM1', 'MATK', 'TRIM8', 'COX14', 'ASAP1', 'GNG2', 'P2RX7', 'LRMP', 'CD3G', 'ALDOA', 'GNA13', 'ISG15', 'RAB37', 'MMD', 'FAM162A', 'BORCS8', 'DDIT4', 'PTP4A2', 'CD82', 'MIF', 'PFKP', 'GIMAP5', 'EEA1', 'BATF']

sc.tl.score_genes(allt, tfh_markers, score_name="tfh_ciucci")
sc.pl.violin(allt, keys="tfh_ciucci", groupby="leiden")
computing score 'tfh_ciucci'
    finished: added
    'tfh_ciucci', score of gene set (adata.obs).
    643 total control genes are used. (0:00:00)
In [129]:
list_TCells=["CD8", "CD3E", "CD4"]
list_NKCells=["NCAM1","GZMK", "GNLY", "GZMA"]
list_cytoTOXIC=["GZMK", "GNLY", "GZMA", "CD8A", "CD8B"]

list_Tregs=["IL2RA", "FOXP3", "CTLA4", "LAG3", "TNFRSF18", "IKZF2", "IKZF4", "LGMN"]
list_Th1=["IFNG", "TBX21", "CD4"]
list_Th2=["GATA3", "CRTH2", "IL4", "IL13", "CD4"]
list_Th17=["CD161", "CCR4", "CD4"]
list_TFH=["PDCD1", "CXCR5", "BCL6", "CD4"]

list_NaiveT=["CCR7", "IL7R", "LEF1"]
list_ActivatedT=["IL2RA"] #CD25


sc.tl.score_genes(allt, list_Tregs, score_name="treg_shah")
sc.pl.violin(allt, keys="treg_shah", groupby="leiden")

sc.tl.score_genes(allt, list_Th1, score_name="th1_shah")
sc.pl.violin(allt, keys="th1_shah", groupby="leiden")

sc.tl.score_genes(allt, list_Th2, score_name="th2_shah")
sc.pl.violin(allt, keys="th2_shah", groupby="leiden")

sc.tl.score_genes(allt, list_Th17, score_name="th17_shah")
sc.pl.violin(allt, keys="th17_shah", groupby="leiden")

sc.tl.score_genes(allt, list_TFH, score_name="tfh_shah")
sc.pl.violin(allt, keys="tfh_shah", groupby="leiden")

sc.tl.score_genes(allt, list_NaiveT, score_name="naive_shah")
sc.pl.violin(allt, keys="naive_shah", groupby="leiden")

sc.tl.score_genes(allt, list_ActivatedT, score_name="active_shah")
sc.pl.violin(allt, keys="active_shah", groupby="leiden")
computing score 'treg_shah'
    finished: added
    'treg_shah', score of gene set (adata.obs).
    250 total control genes are used. (0:00:00)
computing score 'th1_shah'
    finished: added
    'th1_shah', score of gene set (adata.obs).
    150 total control genes are used. (0:00:00)
computing score 'th2_shah'
WARNING: genes are not in var_names and ignored: ['CRTH2']
    finished: added
    'th2_shah', score of gene set (adata.obs).
    199 total control genes are used. (0:00:00)
computing score 'th17_shah'
WARNING: genes are not in var_names and ignored: ['CD161']
    finished: added
    'th17_shah', score of gene set (adata.obs).
    50 total control genes are used. (0:00:00)
computing score 'tfh_shah'
    finished: added
    'tfh_shah', score of gene set (adata.obs).
    200 total control genes are used. (0:00:00)
computing score 'naive_shah'
    finished: added
    'naive_shah', score of gene set (adata.obs).
    99 total control genes are used. (0:00:00)
computing score 'active_shah'
WARNING: genes are not in var_names and ignored: ['CD25']
WARNING: provided gene list has length 0, scores as 0
In [131]:
list_ActivatedT=["IL2RA"] #CD25
sc.tl.score_genes(allt, list_ActivatedT, score_name="active_shah")
sc.pl.violin(allt, keys="active_shah", groupby="leiden")
computing score 'active_shah'
    finished: added
    'active_shah', score of gene set (adata.obs).
    50 total control genes are used. (0:00:00)
In [132]:
list_cytoTOXIC=["GZMK", "GNLY", "GZMA", "CD8A", "CD8B"]
sc.tl.score_genes(allt, list_cytoTOXIC, score_name="cd8_shah")
sc.pl.violin(allt, keys="cd8_shah", groupby="leiden")
computing score 'cd8_shah'
    finished: added
    'cd8_shah', score of gene set (adata.obs).
    200 total control genes are used. (0:00:00)
In [133]:
sc.pl.violin(allt, keys=["CD8A", "CD8B"], groupby="leiden")
In [137]:
sc.pl.umap(allt, color="CD4")
In [ ]: